from lxml import etree

text = ''' <div> <ul> 
        <li class="item-1"><a href="link1.html">first item</a></li> 
        <li class="item-1"><a href="link2.html">second item</a></li> 
        <li class="item-inactive"><a href="link3.html">third item</a></li> 
        <li class="item-1"><a href="link4.html">fourth item</a></li> 
        <li class="item-0"><a href="link5.html">fifth item</a> 
        </ul> </div> '''

# 利用etree.HTML，将字符串转化为Element对象, Element对象具有XPath的方法
html = etree.HTML(text)
print(type(html))
print(html)

# 课件将Element对象，转化为字符串
# handled_html_str = etree.tostring(html).decode()
# print(handled_html_str)
